import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
import re
import json

base_url = "https://coding.imooc.com"
course_list = []


def get_course_data(page_url):
    print(f"正在爬取 {page_url} 的课程数据...")
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"}

    resp = requests.get(page_url, headers=headers)

    bs = BeautifulSoup(resp.text, "html.parser")
    li_list = bs.find_all("li", class_="course-card")

    for li in li_list:
        course = {}
        li: Tag
        # 课程名称
        cname = li.find("p", class_="title ellipsis2").text

        # 课程类型和报名人数
        cat_num_str = li.find("span", class_="numbers l").text
        cat, num_str = re.findall(r"(.*?)· (\d+)人报名", cat_num_str)[0]

        # 价格
        price = li.find("span", class_="price l red bold").text.replace("￥", "")

        course['课程名称'] = cname.strip()
        course['课程类型'] = cat.strip()
        course['报名人数'] = int(num_str)
        course['价格'] = float(price)
        course_list.append(course)
        print(course['课程名称'])

    # 获取下一页所在a标签
    a_next = bs.find("a", string="下一页")

    if a_next:  # 如果a_next为None，则表示已经到了最后一页了
        next_url = base_url + a_next.get("href")  # 获取下一页完整路径
        get_course_data(next_url)
    else:
        # 将课程数据保存到json文件中
        with open("course_data.json", "w", encoding='utf8') as f:
            json.dump(course_list, f, ensure_ascii=False, indent=4)


if __name__ == '__main__':
    get_course_data(base_url)
